## Importing relevant Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
!pip install seaborn
import seaborn as sns
import plotly.graph_objects as go
import warnings
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_log_error
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import *
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import fbeta_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler
from statsmodels.stats.outliers_influence import variance_inflation_factor
from imblearn.over_sampling import SMOTE
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import *
import plotly.express as px
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
Requirement already satisfied: seaborn in ./opt/anaconda3/lib/python3.9/site-packages (0.11.2) Requirement already satisfied: pandas>=0.23 in ./opt/anaconda3/lib/python3.9/site-packages (from seaborn) (1.4.2) Requirement already satisfied: numpy>=1.15 in ./opt/anaconda3/lib/python3.9/site-packages (from seaborn) (1.21.5) Requirement already satisfied: matplotlib>=2.2 in ./opt/anaconda3/lib/python3.9/site-packages (from seaborn) (3.5.1) Requirement already satisfied: scipy>=1.0 in ./opt/anaconda3/lib/python3.9/site-packages (from seaborn) (1.7.3) Requirement already satisfied: pillow>=6.2.0 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (9.0.1) Requirement already satisfied: fonttools>=4.22.0 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (4.25.0) Requirement already satisfied: cycler>=0.10 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (0.11.0) Requirement already satisfied: kiwisolver>=1.0.1 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (1.3.2) Requirement already satisfied: python-dateutil>=2.7 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (2.8.2) Requirement already satisfied: packaging>=20.0 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (21.3) Requirement already satisfied: pyparsing>=2.2.1 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (3.0.4) Requirement already satisfied: pytz>=2020.1 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas>=0.23->seaborn) (2021.3) Requirement already satisfied: six>=1.5 in ./opt/anaconda3/lib/python3.9/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)
#Loaading dataset
Customer_churn = pd.read_csv('/Users/Admin/Desktop/Telco-Customer-Churn.csv')
Customer_churn.head()
| customerID | gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | ... | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7590-VHVEG | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | 5575-GNVDE | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | ... | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
| 2 | 3668-QPYBK | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | ... | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | 7795-CFOCW | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | ... | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | 9237-HQITU | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
5 rows × 21 columns
Customer_churn.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7043 entries, 0 to 7042 Data columns (total 21 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 customerID 7043 non-null object 1 gender 7043 non-null object 2 SeniorCitizen 7043 non-null int64 3 Partner 7043 non-null object 4 Dependents 7043 non-null object 5 tenure 7043 non-null int64 6 PhoneService 7043 non-null object 7 MultipleLines 7043 non-null object 8 InternetService 7043 non-null object 9 OnlineSecurity 7043 non-null object 10 OnlineBackup 7043 non-null object 11 DeviceProtection 7043 non-null object 12 TechSupport 7043 non-null object 13 StreamingTV 7043 non-null object 14 StreamingMovies 7043 non-null object 15 Contract 7043 non-null object 16 PaperlessBilling 7043 non-null object 17 PaymentMethod 7043 non-null object 18 MonthlyCharges 7043 non-null float64 19 TotalCharges 7043 non-null object 20 Churn 7043 non-null object dtypes: float64(1), int64(2), object(18) memory usage: 1.1+ MB
## Viewing dataset
Customer_churn
| customerID | gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | ... | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7590-VHVEG | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | 5575-GNVDE | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | ... | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
| 2 | 3668-QPYBK | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | ... | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | 7795-CFOCW | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | ... | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | 9237-HQITU | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 6840-RESVB | Male | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | ... | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No |
| 7039 | 2234-XADUH | Female | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | ... | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No |
| 7040 | 4801-JZAZL | Female | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No |
| 7041 | 8361-LTMKD | Male | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | ... | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes |
| 7042 | 3186-AJIEK | Male | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | ... | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No |
7043 rows × 21 columns
### Dropping the customer ID colums because it is irrelevant to my analysis
Customer_churn.drop(columns = ['customerID'], inplace = True)
Customer_churn
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
| 2 | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | Male | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No |
| 7039 | Female | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No |
| 7040 | Female | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No |
| 7041 | Male | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes |
| 7042 | Male | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No |
7043 rows × 20 columns
# changing Totalcharges datatype from object to int/float.
Customer_churn["TotalCharges"] = pd.to_numeric(Customer_churn["TotalCharges"], errors = "coerce").fillna(0, downcast = "infer")
Customer_churn
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.50 | No |
| 2 | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | Male | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.50 | No |
| 7039 | Female | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.90 | No |
| 7040 | Female | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No |
| 7041 | Male | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.60 | Yes |
| 7042 | Male | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.50 | No |
7043 rows × 20 columns
Customer_churn.drop(Customer_churn[Customer_churn['TotalCharges'] == 0].index,inplace = True)
Customer_churn.reset_index(drop = True, inplace = True)
Customer_churn
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.50 | No |
| 2 | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7027 | Male | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.50 | No |
| 7028 | Female | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.90 | No |
| 7029 | Female | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No |
| 7030 | Male | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.60 | Yes |
| 7031 | Male | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.50 | No |
7032 rows × 20 columns
Customer_churn.isnull().sum()
gender 0 SeniorCitizen 0 Partner 0 Dependents 0 tenure 0 PhoneService 0 MultipleLines 0 InternetService 0 OnlineSecurity 0 OnlineBackup 0 DeviceProtection 0 TechSupport 0 StreamingTV 0 StreamingMovies 0 Contract 0 PaperlessBilling 0 PaymentMethod 0 MonthlyCharges 0 TotalCharges 0 Churn 0 dtype: int64
Customer_churn['TotalCharges']
0 29.85
1 1889.50
2 108.15
3 1840.75
4 151.65
...
7027 1990.50
7028 7362.90
7029 346.45
7030 306.60
7031 6844.50
Name: TotalCharges, Length: 7032, dtype: float64
!pip install pandas-profiling
Requirement already satisfied: pandas-profiling in ./opt/anaconda3/lib/python3.9/site-packages (3.4.0) Requirement already satisfied: scipy<1.10,>=1.4.1 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (1.7.3) Requirement already satisfied: seaborn<0.13,>=0.10.1 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (0.11.2) Requirement already satisfied: multimethod<1.10,>=1.4 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (1.9) Requirement already satisfied: statsmodels<0.14,>=0.13.2 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (0.13.2) Requirement already satisfied: pandas!=1.4.0,<1.6,>1.1 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (1.4.2) Requirement already satisfied: pydantic<1.11,>=1.8.1 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (1.10.2) Requirement already satisfied: matplotlib<3.6,>=3.2 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (3.5.1) Requirement already satisfied: visions[type_image_path]==0.7.5 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (0.7.5) Requirement already satisfied: missingno<0.6,>=0.4.2 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (0.5.1) Requirement already satisfied: jinja2<3.2,>=2.11.1 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (2.11.3) Requirement already satisfied: tqdm<4.65,>=4.48.2 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (4.64.0) Requirement already satisfied: phik<0.13,>=0.11.1 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (0.12.2) Requirement already satisfied: requests<2.29,>=2.24.0 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (2.27.1) Requirement already satisfied: PyYAML<6.1,>=5.0.0 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (6.0) Requirement already satisfied: numpy<1.24,>=1.16.0 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (1.21.5) Requirement already satisfied: htmlmin==0.1.12 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (0.1.12) Requirement already satisfied: attrs>=19.3.0 in ./opt/anaconda3/lib/python3.9/site-packages (from visions[type_image_path]==0.7.5->pandas-profiling) (21.4.0) Requirement already satisfied: tangled-up-in-unicode>=0.0.4 in ./opt/anaconda3/lib/python3.9/site-packages (from visions[type_image_path]==0.7.5->pandas-profiling) (0.2.0) Requirement already satisfied: networkx>=2.4 in ./opt/anaconda3/lib/python3.9/site-packages (from visions[type_image_path]==0.7.5->pandas-profiling) (2.7.1) Requirement already satisfied: Pillow in ./opt/anaconda3/lib/python3.9/site-packages (from visions[type_image_path]==0.7.5->pandas-profiling) (9.0.1) Requirement already satisfied: imagehash in ./opt/anaconda3/lib/python3.9/site-packages (from visions[type_image_path]==0.7.5->pandas-profiling) (4.3.1) Requirement already satisfied: MarkupSafe>=0.23 in ./opt/anaconda3/lib/python3.9/site-packages (from jinja2<3.2,>=2.11.1->pandas-profiling) (2.0.1) Requirement already satisfied: fonttools>=4.22.0 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib<3.6,>=3.2->pandas-profiling) (4.25.0) Requirement already satisfied: python-dateutil>=2.7 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib<3.6,>=3.2->pandas-profiling) (2.8.2) Requirement already satisfied: kiwisolver>=1.0.1 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib<3.6,>=3.2->pandas-profiling) (1.3.2) Requirement already satisfied: pyparsing>=2.2.1 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib<3.6,>=3.2->pandas-profiling) (3.0.4) Requirement already satisfied: packaging>=20.0 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib<3.6,>=3.2->pandas-profiling) (21.3) Requirement already satisfied: cycler>=0.10 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib<3.6,>=3.2->pandas-profiling) (0.11.0) Requirement already satisfied: pytz>=2020.1 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas!=1.4.0,<1.6,>1.1->pandas-profiling) (2021.3) Requirement already satisfied: joblib>=0.14.1 in ./opt/anaconda3/lib/python3.9/site-packages (from phik<0.13,>=0.11.1->pandas-profiling) (1.1.0) Requirement already satisfied: typing-extensions>=4.1.0 in ./opt/anaconda3/lib/python3.9/site-packages (from pydantic<1.11,>=1.8.1->pandas-profiling) (4.1.1) Requirement already satisfied: six>=1.5 in ./opt/anaconda3/lib/python3.9/site-packages (from python-dateutil>=2.7->matplotlib<3.6,>=3.2->pandas-profiling) (1.16.0) Requirement already satisfied: urllib3<1.27,>=1.21.1 in ./opt/anaconda3/lib/python3.9/site-packages (from requests<2.29,>=2.24.0->pandas-profiling) (1.26.9) Requirement already satisfied: certifi>=2017.4.17 in ./opt/anaconda3/lib/python3.9/site-packages (from requests<2.29,>=2.24.0->pandas-profiling) (2022.9.24) Requirement already satisfied: charset-normalizer~=2.0.0 in ./opt/anaconda3/lib/python3.9/site-packages (from requests<2.29,>=2.24.0->pandas-profiling) (2.0.4) Requirement already satisfied: idna<4,>=2.5 in ./opt/anaconda3/lib/python3.9/site-packages (from requests<2.29,>=2.24.0->pandas-profiling) (3.3) Requirement already satisfied: patsy>=0.5.2 in ./opt/anaconda3/lib/python3.9/site-packages (from statsmodels<0.14,>=0.13.2->pandas-profiling) (0.5.2) Requirement already satisfied: PyWavelets in ./opt/anaconda3/lib/python3.9/site-packages (from imagehash->visions[type_image_path]==0.7.5->pandas-profiling) (1.3.0)
from pandas_profiling import ProfileReport
Customer_churn.profile_report()
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]
Customer_churn.corr()
| SeniorCitizen | tenure | MonthlyCharges | TotalCharges | |
|---|---|---|---|---|
| SeniorCitizen | 1.000000 | 0.015683 | 0.219874 | 0.102411 |
| tenure | 0.015683 | 1.000000 | 0.246862 | 0.825880 |
| MonthlyCharges | 0.219874 | 0.246862 | 1.000000 | 0.651065 |
| TotalCharges | 0.102411 | 0.825880 | 0.651065 | 1.000000 |
sns.set(rc = {'figure.figsize':(9,9)})
sns.heatmap(Customer_churn.corr(), annot =True)
<AxesSubplot:>
# Checking the number of customers on different internet packages.
Internet_package = Customer_churn['InternetService'].value_counts()
Result = pd.DataFrame(Internet_package)
# Creating columns for new datafram called results.
Result = Result.reset_index()
Result.columns = ['InternetService', 'Number_of_customers']
Result
| InternetService | Number_of_customers | |
|---|---|---|
| 0 | Fiber optic | 3096 |
| 1 | DSL | 2416 |
| 2 | No | 1520 |
# Plotting a graph to show the percentage of customer churn per InternetService
fig1 = go.Figure(
data=go.Pie(values=Customer_churn['InternetService'].value_counts().values,labels=Customer_churn['InternetService'].value_counts().index,title='Percentage of customer churn for Internet services'))
fig1.show()
# Matching the internet service with the tech support they receive.
sns.set(rc = {'figure.figsize': (10,6)})
sns.countplot(data = Customer_churn, x= 'InternetService', hue = 'TechSupport', palette = 'inferno_r')
<AxesSubplot:xlabel='InternetService', ylabel='count'>
## Plotting a graph to visualize the level of online security received by various Internet services
sns.set(rc = {'figure.figsize': (10,6)})
sns.countplot(data = Customer_churn, x= 'InternetService', hue = 'OnlineSecurity', palette = 'inferno_r')
<AxesSubplot:xlabel='InternetService', ylabel='count'>
sns.set(rc = {'figure.figsize': (10,6)})
sns.countplot(data = Customer_churn, x= 'gender', hue = 'Churn', palette = 'twilight')
<AxesSubplot:xlabel='gender', ylabel='count'>
# Plotting a count plot to visualize the churn amount of dependents
sns.set(rc = {'figure.figsize': (10,6)})
sns.countplot(data = Customer_churn, x= 'Dependents', hue = 'Churn', palette = 'Greens_r')
<AxesSubplot:xlabel='Dependents', ylabel='count'>
# Plotting a count plot to visualize the churn amount of dependents
ax = sns.countplot(data= Customer_churn, x='Dependents', hue='Churn',palette="pastel")
for container in ax.containers:
ax.bar_label(container)
# Checking the number of customers associated with each contract type
Contract_type = Customer_churn['Contract'].value_counts()
Contract_results = pd.DataFrame(Contract_type)
# creating columns for new dataframe called Contract_results.
Contract_results = Contract_results.reset_index()
Contract_results.columns = ['Contract', 'Total_customers']
Contract_results
| Contract | Total_customers | |
|---|---|---|
| 0 | Month-to-month | 3875 |
| 1 | Two year | 1685 |
| 2 | One year | 1472 |
# Matching the Contract type with the churn rate
sns.set(rc = {'figure.figsize': (10,5)})
sns.countplot(data = Customer_churn, x= 'Contract', hue = 'Churn', palette = 'pastel')
<AxesSubplot:xlabel='Contract', ylabel='count'>
# Matching the contract type with the InternetService patronised
sns.set(rc = {'figure.figsize': (10,5)})
sns.countplot(data = Customer_churn, x= 'Contract', hue = 'InternetService', palette = 'magma')
<AxesSubplot:xlabel='Contract', ylabel='count'>
# Boxplot showing the impact of tenure on the churn rate
sns.boxplot(data = Customer_churn, x = 'tenure', y = 'Churn', palette = 'ocean')
<AxesSubplot:xlabel='tenure', ylabel='Churn'>
# Plotting a graph to compare the churn rate of customers with partners and those with no partners.
sns.set(rc = {'figure.figsize': (10,5)})
sns.countplot(data = Customer_churn, x= 'Partner', hue = 'Churn', palette = 'magma')
<AxesSubplot:xlabel='Partner', ylabel='count'>
# Visualizing the churn rate of Males and females
ax = sns.countplot(data= Customer_churn, x='gender', hue='Churn',palette="ocean")
for container in ax.containers:
ax.bar_label(container)
# Matching the device protection service against gender
ax = sns.countplot(data= Customer_churn, x='gender', hue='DeviceProtection',palette="twilight")
sns.set(rc = {'figure.figsize': (15,10)})
for container in ax.containers:
ax.bar_label(container)
# Matching the tech support received against gender
ax = sns.countplot(data= Customer_churn, x='gender', hue='TechSupport',palette="twilight")
sns.set(rc = {'figure.figsize': (15,10)})
for container in ax.containers:
ax.bar_label(container)
sns.set(rc = {'figure.figsize': (10,6)})
sns.countplot(data = Customer_churn, x= 'InternetService', hue = 'Churn', palette = 'winter_r')
<AxesSubplot:xlabel='InternetService', ylabel='count'>
# Getting a concise summary of dataset
Customer_churn.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7032 entries, 0 to 7031 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 gender 7032 non-null object 1 SeniorCitizen 7032 non-null int64 2 Partner 7032 non-null object 3 Dependents 7032 non-null object 4 tenure 7032 non-null int64 5 PhoneService 7032 non-null object 6 MultipleLines 7032 non-null object 7 InternetService 7032 non-null object 8 OnlineSecurity 7032 non-null object 9 OnlineBackup 7032 non-null object 10 DeviceProtection 7032 non-null object 11 TechSupport 7032 non-null object 12 StreamingTV 7032 non-null object 13 StreamingMovies 7032 non-null object 14 Contract 7032 non-null object 15 PaperlessBilling 7032 non-null object 16 PaymentMethod 7032 non-null object 17 MonthlyCharges 7032 non-null float64 18 TotalCharges 7032 non-null float64 19 Churn 7032 non-null object dtypes: float64(2), int64(2), object(16) memory usage: 1.1+ MB
# checking for unique vales for dataset
Customer_churn.nunique()
gender 2 SeniorCitizen 2 Partner 2 Dependents 2 tenure 72 PhoneService 2 MultipleLines 3 InternetService 3 OnlineSecurity 3 OnlineBackup 3 DeviceProtection 3 TechSupport 3 StreamingTV 3 StreamingMovies 3 Contract 3 PaperlessBilling 2 PaymentMethod 4 MonthlyCharges 1584 TotalCharges 6530 Churn 2 dtype: int64
# Dropping the numerical columns in the Customer churn dataset.
sample_df = Customer_churn.drop(['MonthlyCharges','TotalCharges','tenure'],axis=1)
# Creating a dataframe called encoded to apply One hot encoding to categorical data columns
Encoded = OneHotEncoder(sparse=False, drop='first',
handle_unknown='ignore')
# Encoding categorical columns
tfd = make_column_transformer((Encoded, ['gender','Partner','Dependents','PhoneService','MultipleLines','InternetService','OnlineSecurity','OnlineBackup','DeviceProtection','TechSupport','StreamingTV','StreamingMovies','Contract','PaperlessBilling','PaymentMethod']),remainder='passthrough',)
# Assigning categorical data to a single dataframe called categoricals
categoricals = ['gender','Partner','Dependents','PhoneService','MultipleLines','InternetService',
'OnlineSecurity','OnlineBackup','DeviceProtection','TechSupport','StreamingTV',
'StreamingMovies','Contract','PaperlessBilling','PaymentMethod']
Customer_churn_encoded = tfd.fit_transform(sample_df)
Customer_churn_encoded = pd.DataFrame(Customer_churn_encoded, columns=tfd.get_feature_names_out())
# Viewing the encoded dataset
Customer_churn_encoded
| onehotencoder__gender_Male | onehotencoder__Partner_Yes | onehotencoder__Dependents_Yes | onehotencoder__PhoneService_Yes | onehotencoder__MultipleLines_No phone service | onehotencoder__MultipleLines_Yes | onehotencoder__InternetService_Fiber optic | onehotencoder__InternetService_No | onehotencoder__OnlineSecurity_No internet service | onehotencoder__OnlineSecurity_Yes | ... | onehotencoder__StreamingMovies_No internet service | onehotencoder__StreamingMovies_Yes | onehotencoder__Contract_One year | onehotencoder__Contract_Two year | onehotencoder__PaperlessBilling_Yes | onehotencoder__PaymentMethod_Credit card (automatic) | onehotencoder__PaymentMethod_Electronic check | onehotencoder__PaymentMethod_Mailed check | remainder__SeniorCitizen | remainder__Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 | No |
| 1 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0 | No |
| 2 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0 | Yes |
| 3 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | No |
| 4 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7027 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | ... | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0 | No |
| 7028 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0 | No |
| 7029 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 | No |
| 7030 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1 | Yes |
| 7031 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | ... | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0 | No |
7032 rows × 28 columns
# Merging the original dataset with the encoded dataset to prepare for modeling
Merged_encoded_data = Customer_churn.join(Customer_churn_encoded)
Merged_encoded_data
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | ... | onehotencoder__StreamingMovies_No internet service | onehotencoder__StreamingMovies_Yes | onehotencoder__Contract_One year | onehotencoder__Contract_Two year | onehotencoder__PaperlessBilling_Yes | onehotencoder__PaymentMethod_Credit card (automatic) | onehotencoder__PaymentMethod_Electronic check | onehotencoder__PaymentMethod_Mailed check | remainder__SeniorCitizen | remainder__Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | Yes | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 | No |
| 1 | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | No | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0 | No |
| 2 | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | Yes | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0 | Yes |
| 3 | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | No | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | No |
| 4 | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | No | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7027 | Male | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | No | ... | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0 | No |
| 7028 | Female | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | Yes | ... | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0 | No |
| 7029 | Female | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | No | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 | No |
| 7030 | Male | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | No | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1 | Yes |
| 7031 | Male | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | No | ... | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0 | No |
7032 rows × 48 columns
# Dropping duplicate and old columns
Complete_set = Merged_encoded_data.drop(columns = categoricals)
# Viewing the complete set to be used in modeling and prediction
Complete_set
| SeniorCitizen | tenure | MonthlyCharges | TotalCharges | Churn | onehotencoder__gender_Male | onehotencoder__Partner_Yes | onehotencoder__Dependents_Yes | onehotencoder__PhoneService_Yes | onehotencoder__MultipleLines_No phone service | ... | onehotencoder__StreamingMovies_No internet service | onehotencoder__StreamingMovies_Yes | onehotencoder__Contract_One year | onehotencoder__Contract_Two year | onehotencoder__PaperlessBilling_Yes | onehotencoder__PaymentMethod_Credit card (automatic) | onehotencoder__PaymentMethod_Electronic check | onehotencoder__PaymentMethod_Mailed check | remainder__SeniorCitizen | remainder__Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 29.85 | 29.85 | No | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 | No |
| 1 | 0 | 34 | 56.95 | 1889.50 | No | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0 | No |
| 2 | 0 | 2 | 53.85 | 108.15 | Yes | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0 | Yes |
| 3 | 0 | 45 | 42.30 | 1840.75 | No | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | No |
| 4 | 0 | 2 | 70.70 | 151.65 | Yes | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7027 | 0 | 24 | 84.80 | 1990.50 | No | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | ... | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0 | No |
| 7028 | 0 | 72 | 103.20 | 7362.90 | No | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | ... | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0 | No |
| 7029 | 0 | 11 | 29.60 | 346.45 | No | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 | No |
| 7030 | 1 | 4 | 74.40 | 306.60 | Yes | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1 | Yes |
| 7031 | 0 | 66 | 105.65 | 6844.50 | No | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0 | No |
7032 rows × 33 columns
# Applying label encoding to the target variable
le = LabelEncoder()
Complete_set['Churn'] = le.fit_transform(Complete_set['Churn'])
# Dropping irrelevant columns
Complete_set.drop(columns = ['remainder__Churn'], inplace = True)
Complete_set
| SeniorCitizen | tenure | MonthlyCharges | TotalCharges | Churn | onehotencoder__gender_Male | onehotencoder__Partner_Yes | onehotencoder__Dependents_Yes | onehotencoder__PhoneService_Yes | onehotencoder__MultipleLines_No phone service | ... | onehotencoder__StreamingTV_Yes | onehotencoder__StreamingMovies_No internet service | onehotencoder__StreamingMovies_Yes | onehotencoder__Contract_One year | onehotencoder__Contract_Two year | onehotencoder__PaperlessBilling_Yes | onehotencoder__PaymentMethod_Credit card (automatic) | onehotencoder__PaymentMethod_Electronic check | onehotencoder__PaymentMethod_Mailed check | remainder__SeniorCitizen | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 29.85 | 29.85 | 0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 |
| 1 | 0 | 34 | 56.95 | 1889.50 | 0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0 |
| 2 | 0 | 2 | 53.85 | 108.15 | 1 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0 |
| 3 | 0 | 45 | 42.30 | 1840.75 | 0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 |
| 4 | 0 | 2 | 70.70 | 151.65 | 1 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7027 | 0 | 24 | 84.80 | 1990.50 | 0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | ... | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0 |
| 7028 | 0 | 72 | 103.20 | 7362.90 | 0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | ... | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0 |
| 7029 | 0 | 11 | 29.60 | 346.45 | 0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 |
| 7030 | 1 | 4 | 74.40 | 306.60 | 1 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1 |
| 7031 | 0 | 66 | 105.65 | 6844.50 | 0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0 |
7032 rows × 32 columns
# Splitting data
# Defining the target & predictor variables
X = Complete_set.drop(columns = ["Churn"])
y = Complete_set["Churn"]
# Splitting the dataframe into train and test
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size = 0.25, random_state = 24, stratify = y)
# Joining train dataset for modelling
train_data = X_train.join(y_train, on = X_train.index)
train_data.head()
| SeniorCitizen | tenure | MonthlyCharges | TotalCharges | onehotencoder__gender_Male | onehotencoder__Partner_Yes | onehotencoder__Dependents_Yes | onehotencoder__PhoneService_Yes | onehotencoder__MultipleLines_No phone service | onehotencoder__MultipleLines_Yes | ... | onehotencoder__StreamingMovies_No internet service | onehotencoder__StreamingMovies_Yes | onehotencoder__Contract_One year | onehotencoder__Contract_Two year | onehotencoder__PaperlessBilling_Yes | onehotencoder__PaymentMethod_Credit card (automatic) | onehotencoder__PaymentMethod_Electronic check | onehotencoder__PaymentMethod_Mailed check | remainder__SeniorCitizen | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3996 | 0 | 38 | 20.30 | 743.05 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0 | 0 |
| 3726 | 0 | 23 | 77.15 | 1759.40 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0 | 0 |
| 3246 | 1 | 37 | 106.75 | 4056.75 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1 | 1 |
| 260 | 1 | 3 | 41.15 | 132.20 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1 | 1 |
| 4377 | 0 | 21 | 20.35 | 422.70 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0 | 0 |
5 rows × 32 columns
train_data.shape
(5274, 32)
# Joining test dataset for modelling
test_data = X_test.join(y_test, on = X_test.index)
test_data.head()
| SeniorCitizen | tenure | MonthlyCharges | TotalCharges | onehotencoder__gender_Male | onehotencoder__Partner_Yes | onehotencoder__Dependents_Yes | onehotencoder__PhoneService_Yes | onehotencoder__MultipleLines_No phone service | onehotencoder__MultipleLines_Yes | ... | onehotencoder__StreamingMovies_No internet service | onehotencoder__StreamingMovies_Yes | onehotencoder__Contract_One year | onehotencoder__Contract_Two year | onehotencoder__PaperlessBilling_Yes | onehotencoder__PaymentMethod_Credit card (automatic) | onehotencoder__PaymentMethod_Electronic check | onehotencoder__PaymentMethod_Mailed check | remainder__SeniorCitizen | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4712 | 1 | 64 | 102.10 | 6688.10 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1 | 0 |
| 5934 | 0 | 1 | 84.85 | 84.85 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 | 1 |
| 6163 | 0 | 10 | 70.10 | 659.65 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0 | 1 |
| 5758 | 0 | 65 | 59.80 | 3808.20 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0 | 0 |
| 212 | 0 | 61 | 19.75 | 1124.20 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | ... | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0 | 0 |
5 rows × 32 columns
test_data.shape
(1758, 32)
# Applying a count to know the number of churned and not churned customers
count_not_churned, count_churned = train_data['Churn'].value_counts()
count_not_churned, count_churned
(3872, 1402)
count_churned
1402
# Performing splits to balance the data
X = train_data.drop('Churn',axis='columns')
y = train_data['Churn']
# Using the SMOTE balancing to balance data
smote = SMOTE(sampling_strategy='minority')
X_sm, y_sm = smote.fit_resample(X, y)
y_sm.value_counts()
0 3872 1 3872 Name: Churn, dtype: int64
# Further splitting of balanced data
X_train_sm, X_test_sm, y_train_sm, y_test_sm = train_test_split(X_sm, y_sm, test_size=0.2,random_state=15, stratify=y_sm)
#Scaling the train_data set
columns_to_scale = ['tenure','MonthlyCharges','TotalCharges']
scaler = MinMaxScaler()
train_data[columns_to_scale] = scaler.fit_transform(train_data[columns_to_scale])
#Scaling the test_data set
columns_to_scale = ['tenure','MonthlyCharges','TotalCharges']
scaler = MinMaxScaler()
test_data[columns_to_scale] = scaler.fit_transform(test_data[columns_to_scale])
# Logistic Regression
log_reg = LogisticRegression()
log_reg_model = log_reg.fit(X_train_sm, y_train_sm)
warnings.filterwarnings('ignore')
/Users/Admin/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning:
lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
## Feature Importance of the Random Forest Model
log_reg_importance = log_reg_model.coef_[0]
log_reg_importance = pd.DataFrame(log_reg_importance, index = X.columns)
log_reg_importance.reset_index(inplace = True)
log_reg_importance.rename(columns = {"index": "Feature", 0:"Score"}, inplace = True)
log_reg_importance.sort_values(by = "Score", ascending = False, inplace = True)
log_reg_importance
| Feature | Score | |
|---|---|---|
| 8 | onehotencoder__MultipleLines_No phone service | 0.773764 |
| 30 | remainder__SeniorCitizen | 0.563929 |
| 26 | onehotencoder__PaperlessBilling_Yes | 0.501136 |
| 10 | onehotencoder__InternetService_Fiber optic | 0.480647 |
| 28 | onehotencoder__PaymentMethod_Electronic check | 0.418456 |
| 21 | onehotencoder__StreamingTV_Yes | 0.156438 |
| 9 | onehotencoder__MultipleLines_Yes | 0.130062 |
| 23 | onehotencoder__StreamingMovies_Yes | 0.118922 |
| 4 | onehotencoder__gender_Male | 0.090496 |
| 29 | onehotencoder__PaymentMethod_Mailed check | 0.026592 |
| 2 | MonthlyCharges | 0.015397 |
| 3 | TotalCharges | 0.000488 |
| 5 | onehotencoder__Partner_Yes | -0.012596 |
| 27 | onehotencoder__PaymentMethod_Credit card (auto... | -0.020909 |
| 20 | onehotencoder__StreamingTV_No internet service | -0.055022 |
| 18 | onehotencoder__TechSupport_No internet service | -0.055022 |
| 11 | onehotencoder__InternetService_No | -0.055022 |
| 16 | onehotencoder__DeviceProtection_No internet se... | -0.055022 |
| 14 | onehotencoder__OnlineBackup_No internet service | -0.055022 |
| 12 | onehotencoder__OnlineSecurity_No internet service | -0.055022 |
| 22 | onehotencoder__StreamingMovies_No internet ser... | -0.055022 |
| 1 | tenure | -0.077803 |
| 6 | onehotencoder__Dependents_Yes | -0.163997 |
| 15 | onehotencoder__OnlineBackup_Yes | -0.225331 |
| 17 | onehotencoder__DeviceProtection_Yes | -0.283030 |
| 24 | onehotencoder__Contract_One year | -0.478223 |
| 7 | onehotencoder__PhoneService_Yes | -0.532319 |
| 19 | onehotencoder__TechSupport_Yes | -0.767221 |
| 13 | onehotencoder__OnlineSecurity_Yes | -0.798669 |
| 25 | onehotencoder__Contract_Two year | -0.814633 |
| 0 | SeniorCitizen | -1.036961 |
y_pred = log_reg.predict(X_test_sm)
y_true = y_test_sm
# Confusion Matrix for Logistic regression model
cm_lr = confusion_matrix(y_true,y_pred)
f, ax = plt.subplots(figsize=(8,8))
sns.heatmap(cm_lr, annot=True, linewidth=0.5, fmt=".0f",cmap='RdPu',ax = ax)
plt.xlabel = ('y_pred')
plt.ylabel = ('y_true')
plt.show()
# Evaluating the model
smote_report = classification_report(y_true, y_pred, target_names= ["Stayed", "Churned"])
print(smote_report)
precision recall f1-score support
Stayed 0.78 0.78 0.78 775
Churned 0.78 0.79 0.78 774
accuracy 0.78 1549
macro avg 0.78 0.78 0.78 1549
weighted avg 0.78 0.78 0.78 1549
# Finding the F2 Score
fbeta_score(y_true, y_pred, average='binary', beta= 0.9)
0.781624216941063
# Specifying the Model name and setting default parameters
rfc = RandomForestClassifier(random_state=101)
rfc = rfc.fit(X_train_sm, y_train_sm)
# Feature Importance of the Random Forest Model
rf_importance = rfc.feature_importances_
rf_importance = pd.DataFrame(rf_importance, columns = ["score"]).reset_index()
rf_importance["Feature"] = list(X.columns)
rf_importance.drop(columns = ["index"], inplace = True)
rf_importance.sort_values(by = "score", ascending = False, ignore_index = True, inplace = True)
## Visualizing the feature importances
fig = px.bar(rf_importance, x = "Feature", y = "score")
fig.show()
rfc_pred = rfc.predict(X_test_sm)
rfc_true = y_test_sm
# Confusion matrix for RandomForest classifier
cm_rfc = confusion_matrix(rfc_true,rfc_pred)
f, ax = plt.subplots(figsize=(8,8))
sns.heatmap(cm_rfc, annot=True, linewidth=0.5, fmt=".0f",cmap='RdPu',ax = ax)
plt.xlabel = ('rfc_pred')
plt.ylabel = ('rfc_true')
plt.show()
# Evaluating the model
smote_report = classification_report(rfc_true, rfc_pred, target_names= ["Stayed", "Churned"])
print(smote_report)
precision recall f1-score support
Stayed 0.85 0.91 0.88 775
Churned 0.90 0.84 0.87 774
accuracy 0.87 1549
macro avg 0.88 0.87 0.87 1549
weighted avg 0.88 0.87 0.87 1549
fbeta_score(rfc_true, rfc_pred, average='binary', beta= 0.9)
0.872068642467322
# Specifying the Model name and setting default parameters
dt = DecisionTreeClassifier(random_state=100)
dt = dt.fit(X_train_sm, y_train_sm)
dt_pred = dt.predict(X_test_sm)
dt_true = y_test_sm
#Confusion Matrix for Decision Tree Classifier
cm_dt = confusion_matrix(dt_true,dt_pred)
f, ax = plt.subplots(figsize=(8,8))
sns.heatmap(cm_dt, annot=True, linewidth=0.5, fmt=".0f",cmap='RdPu',ax = ax)
plt.xlabel = ('dt_pred')
plt.ylabel = ('dt_true')
plt.show()
# Evaluating the model
smote_report = classification_report(dt_true, dt_pred, target_names= ["Stayed", "Churned"])
print(smote_report)
precision recall f1-score support
Stayed 0.81 0.83 0.82 775
Churned 0.83 0.80 0.82 774
accuracy 0.82 1549
macro avg 0.82 0.82 0.82 1549
weighted avg 0.82 0.82 0.82 1549
fbeta_score(dt_true, dt_pred, average='binary', beta= 0.9)
0.8183447755344934
# Specifying the Model name and setting default parameters
knn = KNeighborsClassifier(n_neighbors = 15)
knn = knn.fit(X_train_sm, y_train_sm)
knn_pred = knn.predict(X_test_sm)
knn_true = y_test_sm
# Confusion Matrix for KNN
cm_knn = confusion_matrix(knn_true,knn_pred)
f, ax = plt.subplots(figsize=(8,8))
sns.heatmap(cm_knn, annot=True, linewidth=0.5, fmt=".0f",cmap='RdPu',ax = ax)
plt.xlabel = ('knn_pred')
plt.ylabel = ('knn_true')
plt.show()
# Evaluating the model
smote_report = classification_report(knn_true, knn_pred, target_names= ["Stayed", "Churned"])
print(smote_report)
precision recall f1-score support
Stayed 0.78 0.71 0.75 775
Churned 0.74 0.80 0.77 774
accuracy 0.76 1549
macro avg 0.76 0.76 0.76 1549
weighted avg 0.76 0.76 0.76 1549
fbeta_score(knn_true, knn_pred, average='binary', beta= 0.9)
0.7643352750281749
# Specifying the model name and setting default parameters
sgd = SGDClassifier(loss= 'modified_huber', shuffle= True, random_state = 101)
sgd = sgd.fit(X_train_sm, y_train_sm)
sgd_pred = sgd.predict(X_test_sm)
sgd_true = y_test_sm
# Confusion matrix for Stochastic Gradient Descent Model
cm_sgd = confusion_matrix(sgd_true,sgd_pred)
f, ax = plt.subplots(figsize=(8,8))
sns.heatmap(cm_sgd, annot=True, linewidth=0.5, fmt=".0f",cmap='RdPu',ax = ax)
plt.xlabel = ('sgd_pred')
plt.ylabel = ('sgd_true')
plt.show()
# Evaluating the model
smote_report = classification_report(sgd_true, sgd_pred, target_names= ["Stayed", "Churned"])
print(smote_report)
precision recall f1-score support
Stayed 0.67 0.86 0.76 775
Churned 0.81 0.58 0.68 774
accuracy 0.72 1549
macro avg 0.74 0.72 0.72 1549
weighted avg 0.74 0.72 0.72 1549
fbeta_score(sgd_true, sgd_pred, average='binary', beta= 0.9)
0.6872213841381766
# Defining the values and setting the grid to be used in the RandomizedSearch
random_grid = {
"n_estimators":list(range(10, 101, 10)),
"max_depth": [1, 5, 10, 20, 50, 75, 100, 150, 200],
"bootstrap": [True, False],
"criterion": ["gini", "entropy"],
"max_features": ["sqrt", "log2", None],
"random_state": [24]
}
# Running the RandomizedSearch Cross-Validation with the grid
rf_rscv = RandomizedSearchCV(estimator= rfc,
param_distributions= random_grid,
n_iter= 30,
cv= 10,
random_state= 24,
n_jobs= 5)
rf_rscv.fit(X_train_sm,y_train_sm)
warnings.filterwarnings('ignore')
# Looking at the best combination of hyperparameters for the model
best_params = rf_rscv.best_params_
print("The best combination of hyperparameters for the model will be:")
for param_name in sorted(best_params.keys()):
print(f"{param_name}: {best_params[param_name]}")
The best combination of hyperparameters for the model will be: bootstrap: True criterion: gini max_depth: 10 max_features: log2 n_estimators: 80 random_state: 24
# Viewing the best parameters after cross validation
best_params
{'random_state': 24,
'n_estimators': 80,
'max_features': 'log2',
'max_depth': 10,
'criterion': 'gini',
'bootstrap': True}
# Defining the best version of the model with the best parameters
best_rf_model = RandomForestClassifier(random_state= 24,
n_estimators = 80,
max_features = 'log2',
max_depth = 10,
criterion = 'gini',
bootstrap = True)
# Discovering the best score for the Random forest cross validation
rf_rscv.best_score_
0.8590807233310752
rf_rscv.score(X_train_sm,y_train_sm)
0.9075060532687651
# Defining the best version of the model with the best parameters
best_rf_model = RandomForestClassifier(random_state= 24,
n_estimators = 80,
max_features = 'log2',
max_depth = 10,
criterion = 'gini',
bootstrap = True)
# Fitting the model to the training data
best_rf_model = best_rf_model.fit(X_train_sm,y_train_sm)
# Predicting the test data
best_rf_pred = best_rf_model.predict(X_test_sm)
# Evaluating the model
best_rf_report = classification_report(y_test_sm, best_rf_pred, target_names=["Stayed", "Churned"])
print(best_rf_report)
precision recall f1-score support
Stayed 0.87 0.88 0.87 775
Churned 0.88 0.87 0.87 774
accuracy 0.87 1549
macro avg 0.87 0.87 0.87 1549
weighted avg 0.87 0.87 0.87 1549
# Calculating the accuracy score
accuracy = accuracy_score(y_test_sm, best_rf_pred)
accuracy = "{:.5f}".format(accuracy)
print("Accuracy:", accuracy)
Accuracy: 0.87282
# Calculating the F2 Score
f2_score = fbeta_score(y_test_sm, best_rf_pred, beta= 0.5)
f2_score = "{:.5f}".format(f2_score)
print("F2 Score:", f2_score)
F2 Score: 0.87454
# Confusion Matrix
best_rf_conf_mat = confusion_matrix(y_test_sm, best_rf_pred)
best_rf_conf_mat = (pd.DataFrame(best_rf_conf_mat).reset_index(drop=True)).rename(columns={0: "Stayed", 1: "Churned"})
best_rf_conf_mat
| Stayed | Churned | |
|---|---|---|
| 0 | 680 | 95 |
| 1 | 102 | 672 |
# Visualizing the Confusion Matrix
f, ax = plt.subplots()
sns.heatmap(best_rf_conf_mat,
annot= True,
linewidth= 1.0,
fmt= ".0f",
cmap= "RdPu",
ax= ax)
plt.show()
# Cross-validating the scores of the best model
final_cv_score = cross_val_score(best_rf_model,
X_train_sm, y_train_sm,
cv= 10).mean()
final_cv_score = "{0:.5}".format(final_cv_score)
print("Average score of the model on cross-validation is:", final_cv_score)
Average score of the model on cross-validation is: 0.85908
# Defining the values and setting the grid to be used in the GridSearch
gscv_param_grid = {
"n_estimators": list(range(50, 400, 20)),
"max_features": ["sqrt", "log2"],
"max_depth": [50, 100, 150, 200, 300],
"criterion": ["gini", "entropy"],
"random_state": [24],
"bootstrap": [True],
}
# Running the GridSearch Cross-Validation with the specified grid
rf_gscv = GridSearchCV(estimator= rfc,
param_grid= gscv_param_grid,
n_jobs= -1,
cv= 10
)
# Fitting the model to the training data
rf_gscv.fit(X_train_sm, y_train_sm)
GridSearchCV(cv=10, estimator=RandomForestClassifier(random_state=101),
n_jobs=-1,
param_grid={'bootstrap': [True], 'criterion': ['gini', 'entropy'],
'max_depth': [50, 100, 150, 200, 300],
'max_features': ['sqrt', 'log2'],
'n_estimators': [50, 70, 90, 110, 130, 150, 170, 190,
210, 230, 250, 270, 290, 310, 330,
350, 370, 390],
'random_state': [24]})
# Looking at the best combination of hyperparameters for the model
best_params = rf_gscv.best_params_
print("The best combination of hyperparameters for the model will be:")
for param_name in sorted(best_params.keys()):
print(f"{param_name}: {best_params[param_name]}")
The best combination of hyperparameters for the model will be: bootstrap: True criterion: entropy max_depth: 50 max_features: log2 n_estimators: 390 random_state: 24
# Viewing the best parameters after cross validation
best_params
{'bootstrap': True,
'criterion': 'entropy',
'max_depth': 50,
'max_features': 'log2',
'n_estimators': 390,
'random_state': 24}
# Defining the best version of the model with the best parameters
best_grid_rf_model = RandomForestClassifier(random_state= 24,
n_estimators = 390,
max_features = 'log2',
max_depth = 50,
criterion = 'entropy',
bootstrap = True)
# Discovering the best score for the Random forest cross validation
rf_gscv.best_score_
0.8598874355099276
rf_gscv.score(X_train_sm,y_train_sm)
0.9988700564971752
# Defining the best version of the model with the best parameters
best_grid_rf_model = RandomForestClassifier(random_state= 24,
n_estimators = 390,
max_features = 'log2',
max_depth = 50,
criterion = 'entropy',
bootstrap = True)
# Fitting the model to the training data
best_grid_rf_model = best_grid_rf_model.fit(X_train_sm,y_train_sm)
# Predicting the test data
best_grid_rf_pred = best_grid_rf_model.predict(X_test_sm)
# Evaluating the model
best_grid_rf_report = classification_report(y_test_sm, best_grid_rf_pred, target_names=["Stayed", "Churned"])
print(best_grid_rf_report)
precision recall f1-score support
Stayed 0.85 0.91 0.88 775
Churned 0.90 0.84 0.87 774
accuracy 0.87 1549
macro avg 0.88 0.87 0.87 1549
weighted avg 0.88 0.87 0.87 1549
# Calculating the accuracy score
accuracy = accuracy_score(y_test_sm, best_grid_rf_pred)
accuracy = "{:.5f}".format(accuracy)
print("Accuracy:", accuracy)
Accuracy: 0.87411
# Calculating the F2 Score
f2_score = fbeta_score(y_test_sm, best_grid_rf_pred, beta= 0.5)
f2_score = "{:.5f}".format(f2_score)
print("F2 Score:", f2_score)
F2 Score: 0.88904
# Confusion Matrix
best_grid_rf_cm = confusion_matrix(y_test_sm, best_grid_rf_pred)
best_grid_rf_cm = (pd.DataFrame(best_grid_rf_cm).reset_index(drop=True)).rename(columns={0: "Stayed", 1: "Churned"})
best_grid_rf_cm
| Stayed | Churned | |
|---|---|---|
| 0 | 705 | 70 |
| 1 | 125 | 649 |
# Visualizing the Confusion Matrix
f, ax = plt.subplots()
sns.heatmap(best_grid_rf_cm,
annot= True,
linewidth= 1.0,
fmt= ".0f",
cmap= "RdPu",
ax= ax)
plt.show()
# Defining the target & predictor variables
test_X = test_data.drop(columns=["Churn"])
test_y = test_data["Churn"]
# Balancing the test data using SMOTE
smote2 = SMOTE(sampling_strategy='minority')
X_test_sm, y_test_sm = smote.fit_resample(test_X, test_y)
y_sm.value_counts()
0 3872 1 3872 Name: Churn, dtype: int64
# Predicting the test data
sm_pred = best_grid_rf_model.predict(X_test_sm)
# Evaluating the model
smtest_report = classification_report(y_test_sm,sm_pred, target_names=["Stayed","Churned"])
print(smtest_report)
precision recall f1-score support
Stayed 0.83 0.72 0.77 1291
Churned 0.75 0.85 0.80 1291
accuracy 0.79 2582
macro avg 0.79 0.79 0.78 2582
weighted avg 0.79 0.79 0.78 2582
# Calculating the F2 Score
f2_score = fbeta_score(y_test_sm,sm_pred, beta=0.5)
f2_score = "{:.5f}".format(f2_score)
print("F2 Score:", f2_score)
F2 Score: 0.77025
# Confusion Matrix
conf_mat = confusion_matrix(y_test_sm,sm_pred)
conf_mat = (pd.DataFrame(conf_mat).reset_index(
drop=True)).rename(columns={0: "Stayed", 1: "Churned"})
conf_mat
| Stayed | Churned | |
|---|---|---|
| 0 | 928 | 363 |
| 1 | 190 | 1101 |
# Visualizing the Confusion Matrix
f, ax = plt.subplots()
sns.heatmap(conf_mat,
annot=True,
linewidth=1.0,
fmt=".0f",
cmap="RdPu",
ax=ax)
plt.show()